GOAL

dbSNP variants

Difference in Uniquely mapped %

by reference

## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé
## Warning in max(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_nonrandomized_genotypes_maternal"
## [2] "Inf"                                           
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_nonrandomized_genotypes_paternal"
## [2] "Inf"                                           
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_nonrandomized_indels_maternal"
## [2] "Inf"                                        
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_nonrandomized_indels_paternal"
## [2] "Inf"                                        
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_nonrandomized_indelsSNVs_maternal"
## [2] "Inf"                                            
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_nonrandomized_indelsSNVs_paternal"
## [2] "Inf"                                            
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_nonrandomized_SNVs_maternal"
## [2] "Inf"                                      
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_nonrandomized_SNVs_paternal"
## [2] "Inf"                                      
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_randomized_genotypes_maternal"
## [2] "Inf"                                        
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_randomized_genotypes_paternal"
## [2] "Inf"                                        
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_randomized_indels_maternal"
## [2] "Inf"                                     
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_randomized_indels_paternal"
## [2] "Inf"                                     
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_randomized_indelsSNVs_maternal"
## [2] "Inf"                                         
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_randomized_indelsSNVs_paternal"
## [2] "Inf"                                         
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_randomized_SNVs_maternal"
## [2] "Inf"                                   
## [3] "-Inf"
## Warning in min(x, na.rm = na.rm): aucun argument trouvé pour min ; Inf est
## renvoyé

## Warning in min(x, na.rm = na.rm): aucun argument pour max ; -Inf est renvoyé

## [1] "DBA_2J_genome_randomized_SNVs_paternal"
## [2] "Inf"                                   
## [3] "-Inf"
## [1] 4.55 5.74
## [1] -5.84 -3.44

Difference in Unmapped (too many mismatches) %

Difference in Unmapped (too short) %

Genotypes

Preparation

path <- "F:/BXD/data/MappingEvaluation"
statsName <- "Uniquely mapped reads % |"
lines <- read.table("F:/BXD/data/lines.txt", stringsAsFactors=FALSE)$V1
# remove line 63
lines <- setdiff(lines, "BXD63")

# list samples
samples <- c()
getSamplesNames <- function(x,y){paste0(x,linenumber,y)}
for(linenumber in sub("BXD","",lines)){
  samples <- c(samples, c(t(outer(c("","L"),c("nsd","sd"),getSamplesNames))))
}

# retrieve uniquely mapped reads percentage on genotypesandimputed-based customized references
ref_lines <- c(sapply(lines,rep,4))
reference <- "genotypesandimputed_withoutannotation_EndToEnd_1_0"
conditions <- as.factor(rep(c("nsd","sd"),length(samples)/2))
tissues <- as.factor(rep(sort(rep(c("C","L"),2)),length(samples)/4))
genotypesandimputed <- retrieveStats(reference, paste0(samples, "_"), path, statsName)

# retrieve uniquely mapped reads percentage on genotypes-based customized references
path <- "F:/BXD/data/transcriptome/2_Mapping_STAR/OnPersonalizedRef"
references <- paste0(ref_lines, "_genome_nonrandomized_genotypes_paternal")
genotypes <- retrieveStats(references, paste0(samples, "_"), path, statsName)

# retrieve uniquely mapped reads percentage on B6 mm10 primary assembly
path <- "F:/BXD/data/transcriptome/2_Mapping_STAR/OnGenome/"
references <- "B6mm10_primaryassembly"
B6mm10 <- retrieveStats(references, paste0(samples, "_exactunique"), path, statsName)

# assemble dataset
geno_restrictive <- data.frame(samples, ref_lines, tissues, conditions, genotypesandimputed, genotypes, B6mm10)
geno_restrictive

Consequence of customization

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.440   2.325   3.000   2.934   3.562   4.270
## [1] 1.44 4.27
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.00000 0.01000 0.01000 0.01038 0.01000 0.03000
## [1] 0.00 0.03

eQTL results

We want to answer the question: Are these BXD-specific references better than the non-BXD specific reference (B6 mm10 primary assembly)?

The idea is to look the influence of the mapping downstream. The cis-eQTL detection was chosen because directly downstream gene counting and normalization, and more likely to be impacted by BXD-specific mapping.

The references:

Permissive setting

The setting “genotypesandimputed_withannotation_Local_0_10” is closest to the default/standard mapping setting, except that the references are BXD-specific. This setting is referred to as “permissive”.

Results

BXD-specific references does not improve the eQTL metrics.

Discussion

Is this metric adapted for this comparison? Either the BXD-specific references strategy is not improving eQTL (or not well implemented maybe genotypes imputation is not good enough to see a benefit) OR metric is not adapted because of a systematic bias. Hypothesis: reference mapping bias causes erroneous eQTL calls. If this hypothesis is correct, erroneous eQTL calls are more likely due to a (biased) higher expression level in samples with the reference allele and with genomic variant overlapping with the gene. (It could also be due to more complex scenarios were variants in another genomic regions makes a gene more/less unique).

Exploring differences between eQTLs obtain with B6 reference and with BXD-specific references

Detecting potential reference bias

Trying to see if there is a reference bias. Assumes the number of eQTLs with B6 or D2 should be equivalent if there is no reference bias. It is more logical to use only significant eQTLs.

Exploring plots and stats for Cortex NSD (log2CPM).

## 
##   -1    1 
## 2671 2357

## [1] 0.1427879

## 
##   -1    1 
## 2585 2446

## [1] 0.148702

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -4.24513 -0.13465 -0.03610 -0.03644  0.10980  3.08676
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## -1.921090 -0.108597 -0.051683 -0.004101  0.026183  2.984500
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max.     NA's 
## -0.24400 -0.03601  0.02425  0.01882  0.05329  0.37951       11

## 
## FALSE  TRUE  <NA> 
##    52   132    10
## 
## FALSE  TRUE  <NA> 
##    37   149    11
## 
## FALSE  TRUE 
##   107  4727

Session information

## R version 3.5.3 (2019-03-11)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 18363)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=French_Switzerland.1252  LC_CTYPE=French_Switzerland.1252   
## [3] LC_MONETARY=French_Switzerland.1252 LC_NUMERIC=C                       
## [5] LC_TIME=French_Switzerland.1252    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] vioplot_0.3.5 zoo_1.8-7     sm_2.2-5.6    knitr_1.30   
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-38 digest_0.6.25   grid_3.5.3      jsonlite_1.7.1 
##  [5] magrittr_1.5    evaluate_0.14   rlang_0.4.7     stringi_1.4.6  
##  [9] rmarkdown_2.4   tools_3.5.3     stringr_1.4.0   xfun_0.18      
## [13] yaml_2.2.1      compiler_3.5.3  tcltk_3.5.3     htmltools_0.5.0